# Batch size during inference. 
infer_batch_size: 8
max_out_len: 32

# Hyparams for model:
# These following five vocabularies related configurations will be set
# automatically according to the passed vocabulary path and special tokens.
# Size of source word dictionary.
src_vocab_size: 38512
# Size of target word dictionay
trg_vocab_size: 38512
# Index for <bos> token
bos_idx: 0
# Index for <eos> token
eos_idx: 1
# Index for <unk> token
unk_idx: 2
# Max length of sequences deciding the size of position encoding table.
max_length: 32
# The dimension for word embeddings, which is also the last dimension of
# the input and output of multi-head attention, position-wise feed-forward
# networks, encoder and decoder.
d_model: 512
# Size of the hidden layer in position-wise feed-forward networks.
d_inner_hid: 2048
# Number of head used in multi-head attention.
n_head: 8
# Number of sub-layers to be stacked in the encoder.
num_encoder_layers: 6
# Number of sub-layers to be stacked in the decoder.
num_decoder_layers: 6
# Dropout rates.
dropout: 0.1
# The flag indicating whether to share embedding and softmax weights.
# Vocabularies in source and target should be same for weight sharing.
weight_sharing: True

# Path of trained parameter, to make prediction
init_from_params: base_trained_models/step_final/
